##creating region variable based on DDP-advised regions
ruthdata <- ruthdata %>%
  mutate(region= as.character(case_when(state %in% c("IA", "IL", "IN", "MI", "MN", "OH", "WI") ~ "MidWest",
                state %in% c("AR", "MO", "NE", "OK", "TX") ~ "Mid-America",
                state %in% c("DC", "DE", "MD", "NJ", "NY", "PA", "VA", "WV") ~ "Mid-Atlantic",
                state %in% c("CT", "MA", "ME", "NH", "RI") ~  "New England",
                state%in% c("AZ", "CA", "CO", "ID", "MT", "NM", "NV", "OR", "UT", "WA") ~ "West",
                state %in% c("AL", "FL", "GA", "KY", "LA", "MS", "NC", "SC", "TN") ~ "South")) )
##Creating a visualization for reported employees over time
employment <- ggplot(ruthdata, aes(x = year, y = TotalEmployeeCnt, color = as.factor(BusinessName))) +
  geom_line()+
  theme(legend.position="none") +
  theme(plot.title = element_text(size = 10, face = "bold", hjust = .5),
        axis.title = element_text(size = 10, face = "bold"),
        plot.subtitle = element_text(size = 5, face = "italic", hjust = .5),
        axis.text.x = element_text(size = 10),
        strip.text = element_text(face = "bold", size = 5))+ 
  labs(y = "Total Employee Count",
       x = "Year",
       title = "Ballet Company Reported Employees",
       subtitle = "By Fiscal Year")+
  theme_bw()
ggplotly(employment)
##creating a visualization for volunteers over time
Volunteer <- ggplot(ruthdata, aes(x = year, y = TotalVolunteersCnt, color = as.factor(BusinessName))) +
  geom_line()+
  theme(legend.position="none") +
  theme(plot.title = element_text(size = 10, face = "bold", hjust = .5),
        axis.title = element_text(size = 10, face = "bold"),
        plot.subtitle = element_text(size = 5, face = "italic", hjust = .5),
        axis.text.x = element_text(size = 10),
        strip.text = element_text(face="bold",size = 5)) +
  labs(y = "Total Volunteer Count",
       x = "Year",
       title = "Ballet Company Reported Volunteers",
       subtitle = "By Fiscal Year")+
  theme_bw()
ggplotly(Volunteer)
##Filtering in interesting employment trends
lbordf <- ruthdata %>%
  filter(BusinessName %in% c("BALLET THEATER FOUNDATION", "OREGON BALLET THEATRE", "PACIFIC NORTHWEST BALLET ASSOCIATION", "BALLET ARKANSAS INC", "COLORADO BALLET", "WONDERBOUND", "TULSA BALLET THEATRE INC", "EUGENE BALLET COMPANY", "SACREMENTO BALLET", "BALLET ARIZONA", "SACREMENTO BALLET ASSOCIATION", "THE WASHINGTON BALLET", "BALLET THEATRE FOUNDATION INC"))
## Filtering in outliers in volunteer counts and/or trends
# These companies are the 5 smallest numbers in empminusvol (they have the largest negativr disparities between the number of employees and the number of volunteers)
lbordf1 <- ruthdata %>%
  filter(BusinessName %in% c("EUGENE BALLET COMPANY", "ATLANTA BALLET INC", "BALLET THEATRE OF DES MOINES", "THE STATE OF ALABAMA BALLET INC", "SARASOTA BALLET OF FLORIDA INC"))
##Visualizing the employment trends
Employment <- ggplot(lbordf, aes(x = year, y = TotalEmployeeCnt, color = as.factor(BusinessName))) +
  geom_line()+
  theme(legend.position="none") +
  theme(plot.title = element_text(size = 10, face = "bold", hjust = .5),
        axis.title = element_text(size = 10, face = "bold"),
        plot.subtitle = element_text(size = 5, face = "italic", hjust = .5),
        axis.text.x = element_text(size = 10),
        strip.text = element_text(face = "bold", size = 5))+ 
  labs(y = "Total Employee Count",
       x = "Year",
       title = "Ballet Company Reported Employees",
       subtitle = "By Fiscal Year")+
  theme_bw()
ggplotly(Employment)
##Visualizing volunteer counts for the employment companies
volunteer <- ggplot(lbordf, aes(x = year, y = TotalVolunteersCnt, color = as.factor(BusinessName))) +
  geom_line()+
  theme(legend.position="none") +
  theme(plot.title = element_text(size = 10, face = "bold", hjust = .5),
        axis.title = element_text(size = 10, face = "bold"),
        plot.subtitle = element_text(size = 5, face = "italic", hjust = .5),
        axis.text.x = element_text(size = 10),
        strip.text = element_text(face="bold",size = 5)) +
  labs(y = "Total Volunteer Count",
       x = "Year",
       title = "Ballet Company Reported Volunteers",
       subtitle = "By Fiscal Year")+
  theme_bw()
ggplotly(volunteer)
##looking at employment for the companies with volunteer outliers
Employment1 <- ggplot(lbordf1, aes(x = year, y = TotalEmployeeCnt, color = as.factor(BusinessName))) +
  geom_line()+
  theme(plot.title = element_text(size = 10, face = "bold", hjust = .5),
        axis.title = element_text(size = 10, face = "bold"),
        plot.subtitle = element_text(size = 5, face = "italic", hjust = .5),
        axis.text.x = element_text(size = 10),
        strip.text = element_text(face = "bold", size = 5))+ 
  labs(y = "Total Employee Count",
       x = "Year",
       title = "Ballet Company Reported Employees",
       subtitle = "By Fiscal Year")+
  theme_bw()
ggplotly(Employment1)
##Looking at trends over time for companies with volunteer outliers
volunteer1 <- ggplot(lbordf1, aes(x = year, y = TotalVolunteersCnt, color = as.factor(BusinessName))) +
  geom_line()+
  theme(plot.title = element_text(size = 10, face = "bold", hjust = .5),
        axis.title = element_text(size = 10, face = "bold"),
        plot.subtitle = element_text(size = 5, face = "italic", hjust = .5),
        axis.text.x = element_text(size = 10),
        strip.text = element_text(face="bold",size = 5)) +
  labs(y = "Total Volunteer Count",
       x = "Year",
       title = "Ballet Company Reported Volunteers",
       subtitle = "By Fiscal Year")+
  theme_bw()
ggplotly(volunteer1)
##Visualizing number of Employees Minus Volunteers Over Time
EmpSubVol <- ggplot(ruthdata, aes(x = year, y = empminusvol, color = as.factor(BusinessName))) +
  geom_line()+
  theme(legend.position="none") +
  theme(plot.title = element_text(size = 10, face = "bold", hjust = .5),
        axis.title = element_text(size = 10, face = "bold"),
        plot.subtitle = element_text(size = 5, face = "italic", hjust = .5),
        axis.text.x = element_text(size = 10),
        strip.text = element_text(face="bold",size = 5)) +
  labs(y = "Total Volunteers Subtracted from Total Employees",
       x = "Year",
       title = "Reliance on Unpaid Labor",
       subtitle = "By Fiscal Year")+
  theme_bw()
ggplotly(EmpSubVol)
#Looking at the correlation between the rank of employees and the rank of volunteers 
voloutliers <- ggplot(ruthdata, aes(x=emprank, y=volrank, color = as.factor(BusinessName))) +
  geom_point()+
   theme(legend.position="none")+
  theme_bw()
ggplotly(voloutliers)
#code to find most egregious outliers in emprank and volrank
ruthdata$uplaborrankdif <- ruthdata$volrank-ruthdata$emprank
#sorted this in descending order and collected the top 5% outliers

#summarize the descriptive stats for uplaborrankdif
ruthdata%>%
summarize(min=min(uplaborrankdif),
          max = max(uplaborrankdif),
          mean = mean(uplaborrankdif),
          median = median(uplaborrankdif))
##    min max      mean median
## 1 -188 116 -11.95597      0
#filtering years 
ruthdata2021 <- ruthdata%>%
  filter(year=="2021")

ruthdata2020 <- ruthdata%>%
  filter(year=="2020")

ruthdata2019 <- ruthdata%>%
  filter(year=="2019")

ruthdata2018 <- ruthdata%>%
  filter(year=="2018")

ruthdata2017 <- ruthdata%>%
  filter(year=="2017")

ruthdata2016 <- ruthdata%>%
  filter(year=="2016")

df20202021 <- ruthdata%>%
  filter(year == c("2020", "2021"))

df20172019 <- ruthdata%>%
  filter(year == c("2017", "2018", "2019"))
#summaries for uplabor over years
ruthdata2016%>%
summarize(min=min(uplaborrankdif),
          max = max(uplaborrankdif),
          mean = mean(uplaborrankdif),
          median = median(uplaborrankdif)
          )
##    min max      mean median
## 1 -178 106 -14.67742     -5
ruthdata2017%>%
summarize(min=min(uplaborrankdif),
          max = max(uplaborrankdif),
          median = median(uplaborrankdif),
          mean= mean(uplaborrankdif))
##    min max median      mean
## 1 -184 112      0 -10.81818
ruthdata2018%>%
summarize(min=min(uplaborrankdif),
          max = max(uplaborrankdif),
          median = median(uplaborrankdif),
          mean= mean(uplaborrankdif))
##    min max median      mean
## 1 -181 115      4 -9.271028
ruthdata2019%>%
summarize(min=min(uplaborrankdif),
          max = max(uplaborrankdif),
          median = median(uplaborrankdif),
          mean= mean(uplaborrankdif))
##    min max median      mean
## 1 -188 116      1 -7.678261
ruthdata2020%>%
summarize(min=min(uplaborrankdif),
          max = max(uplaborrankdif),
          median = median(uplaborrankdif),
          mean= mean(uplaborrankdif))
##    min max median   mean
## 1 -188  76     -1 -18.08
ruthdata2021%>%
summarize(min=min(uplaborrankdif),
          max = max(uplaborrankdif),
          median = median(uplaborrankdif),
          mean= mean(uplaborrankdif))
##    min max median      mean
## 1 -150  64   -2.5 -15.09091
#calculated weighted values for multiple year desc stats below
#Desc Stats for total volunteers over years
ruthdata2017%>%
summarize(min=min(TotalVolunteersCnt),
          max = max(TotalVolunteersCnt),
          median = median(TotalVolunteersCnt),
          mean= mean(TotalVolunteersCnt))
##   min  max median     mean
## 1   0 1500     75 137.8586
ruthdata2018%>%
summarize(min=min(TotalVolunteersCnt),
          max = max(TotalVolunteersCnt),
          median = median(TotalVolunteersCnt),
          mean= mean(TotalVolunteersCnt))
##   min  max median     mean
## 1   0 1500     68 140.2991
ruthdata2019%>%
summarize(min=min(TotalVolunteersCnt),
          max = max(TotalVolunteersCnt),
          median = median(TotalVolunteersCnt),
          mean= mean(TotalVolunteersCnt))
##   min  max median     mean
## 1   0 1500     75 139.9739
ruthdata2020%>%
summarize(min=min(TotalVolunteersCnt),
          max = max(TotalVolunteersCnt),
          median = median(TotalVolunteersCnt),
          mean= mean(TotalVolunteersCnt))
##   min  max median   mean
## 1   0 1500   55.5 127.81
ruthdata2021%>%
summarize(min=min(TotalVolunteersCnt),
          max = max(TotalVolunteersCnt),
          median = median(TotalVolunteersCnt),
          mean= mean(TotalVolunteersCnt))
##   min max median     mean
## 1   0 500   45.5 64.54545
#calculated weighted values for multiple year desc stats below
ruthdata2017%>%
summarize(min=min(TotalEmployeeCnt),
          max = max(TotalEmployeeCnt),
          median = median(TotalEmployeeCnt),
          mean= mean(TotalEmployeeCnt))
##   min max median     mean
## 1   0 869     35 121.9192
ruthdata2018%>%
summarize(min=min(TotalEmployeeCnt),
          max = max(TotalEmployeeCnt),
          median = median(TotalEmployeeCnt),
          mean= mean(TotalEmployeeCnt))
##   min  max median     mean
## 1   0 1330     34 119.2897
ruthdata2019%>%
summarize(min=min(TotalEmployeeCnt),
          max = max(TotalEmployeeCnt),
          median = median(TotalEmployeeCnt),
          mean= mean(TotalEmployeeCnt))
##   min  max median     mean
## 1   0 1452     32 116.5565
ruthdata2020%>%
summarize(min=min(TotalEmployeeCnt),
          max = max(TotalEmployeeCnt),
          median = median(TotalEmployeeCnt),
          mean= mean(TotalEmployeeCnt))
##   min  max median   mean
## 1   0 1451   40.5 128.73
ruthdata2021%>%
summarize(min(TotalEmployeeCnt),
          max = max(TotalEmployeeCnt),
          median = median(TotalEmployeeCnt),
          mean= mean(TotalEmployeeCnt))
##   min(TotalEmployeeCnt) max median mean
## 1                     0 333     27   67
#calculated weighted values for multiple year desc stats below
##Summary stats for range 2017-2019
df20172019%>%
summarize(min=min(TotalEmployeeCnt),
          max = max(TotalEmployeeCnt),
          median = median(TotalEmployeeCnt),
          mean= mean(TotalEmployeeCnt))
##   min max median     mean
## 1   0 869     46 130.0083
df20172019%>%
summarize(min=min(TotalVolunteersCnt),
          max = max(TotalVolunteersCnt),
          median = median(TotalVolunteersCnt),
          mean= mean(TotalVolunteersCnt))
##   min  max median     mean
## 1   0 1500     68 142.3471
df20172019%>%
summarize(min=min(uplaborrankdif),
          max = max(uplaborrankdif),
          median = median(uplaborrankdif),
          mean= mean(uplaborrankdif))
##    min max median      mean
## 1 -188 115     -2 -15.65289
df20202021%>%
summarize(min=min(TotalEmployeeCnt),
          max = max(TotalEmployeeCnt),
          median = median(TotalEmployeeCnt),
          mean= mean(TotalEmployeeCnt))
##   min max median     mean
## 1   0 856   40.5 93.16667
df20202021%>%
summarize(min=min(TotalVolunteersCnt),
          max = max(TotalVolunteersCnt),
          median = median(TotalVolunteersCnt),
          mean= mean(TotalVolunteersCnt))
##   min  max median     mean
## 1   0 1500   45.5 104.0667
df20202021%>%
summarize(min=min(uplaborrankdif),
          max = max(uplaborrankdif),
          median = median(uplaborrankdif),
          mean= mean(uplaborrankdif))
##    min max median      mean
## 1 -188  74   -0.5 -16.13333

Answer to question: “Which companies rely most on unpaid labor?”

Variable used: rank of number of volunteers minus rank of number of employees

MIN: -188 MAX: 116

MEAN: -11.95597 MEDIAN: 0

df <- select(ruthdata, c("BusinessName", "year", "region", "uplaborrankdif"))

Top 5% outliers in reiance on unpaid labor as judged by volrank minus emprank:

  1. Ballet Theater of Des Moines (2019) MidWest 2 Ballet Theater of Des Moines (2018) MidWest
  2. Ballet Theater of Des Moines (2017) MidWest
  3. Ballet Theater of Des Moines (2015) MidWest
  4. Ballet Theater of Des Moines (2016) MidWest
  5. Ballet Minnesota (2019) MidWest
  6. CALIFORNIA BALLET ASSOCIATION INC (2016) West
  7. CALIFORNIA BALLET ASSOCIATION INC (2015) West
  8. THE CHATTANOOGA BALLET (2015) South
  9. EUGENE BALLET COMPANY (2017) West
  10. BALLET PALM BEACH INC (2018) South
  11. BALLET PALM BEACH INC (2019) South
  12. EUGENE BALLET COMPANY (2019) West
  13. THE CHATTANOOGA BALLET (2017) South
  14. MADISON BALLET INC (2018) MidWest
  15. MADISON BALLET INC (2019) MidWest
  16. BALLET FRONTIER OF TEXAS (2018) Mid-America
  17. BALLET FRONTIER OF TEXAS (2019) Mid-America
  18. BALLET ARKANSAS INC (2020) Mid-America
  19. EUGENE BALLET COMPANY (2018) West
  20. EUGENE BALLET COMPANY (2020) West
  21. CALIFORNIA BALLET ASSOCIATION INC (2017) West
  22. THE CHATTANOOGA BALLET (2016) South
  23. THE GEORGIA BALLET INC (2015) South
  24. WONDERBOUND (2018) West
  25. WONDERBOUND (2015) West
  26. WONDERBOUND (2016) West
  27. BALLET FRONTIER OF TEXAS (2020) Mid-America
  28. MADISON BALLET INC (2016) MidWest
  29. MADISON BALLET INC (2017) MidWest
  30. MADISON BALLET INC (2020) MidWest

Answer to question: “In which regions is unpaid labor most common?”

11/31 35.5% - Midwest 6/31 19.4% - South 4/31 12.9% - Mid-America 10/31 32.3% - West 0/31 0% - Mid Atlantic 0/31 0% - New England

Answer to question: “Does the total number of employees and volunteers shift during the pandemic?”

note: mean values were WEIGHTED for number of obervations (i.e. a mean for a year with 110 observations was multiplied by 1.1 and a value for a year with 22 observations was multiplied by .22, etc)

Volrank minus Emprank MEANS: 2017-2019: -15.7 2020-2021: -16.1

Volrank minus Emprank MEDIANS: 2017-2019: -2 2020-2021: -.5

Volrank minus Emprank RANGES: 2017-2019: -188, 115 2020-2021: -188, 74

TOTAL EMPLOYEE MEANS: 2017-2019: 130 2020-2021: 93.2

TOTAL EMPLOYEE MEDIANS: 2017-2019: 46 2020-2021: 40.5

TOTAL EMPLOYEE RANGES: 2017-2019: 0-869 2020-2021: 0-856

TOTAL VOLUNTEER MEANS: 2017-2019: 142.3 2020-2021: 104

TOTAL VOLUNTEER MEDIANS: 2017-2019: 68 2020-2021: 45.5

TOTAL VOLUNTEER RANGES: 2017-2019: 0-1500 2020-2021: 0-1500

It appears that the difference, on average, in the median of total employees between 17-19 and 20-21 decreased by 36.8 employees and the median number of employees decreased by 5.5 on average, and the maximum value decreased by 13.
On average, it appears that the difference in the mean of total volunteers between 17-19 and 20-21 decreased by 38.3 volunteers and the median number of volunteers decreased by 22.5 volunteers , on average, but the range remained the same.

Finally, volrank minus emprank takes the independent rankings of each type of labor when compared with all other variables in the dataset. The higher this number is, the more the specific company relies on unpaid labor than the other dance companies in the dataset. When looking at the difference between volrank minus emprank, it appears that the difference in the mean of rank difference between 17-19 and 20-21 decreased by .4 and the median difference in rank increased by 1.5, on average, and the maximum difference in the two rankings decreased by 41.

Findings suggest that the pandemic saw a decline in both paid (employee) labor and unpaid (volunteer) labor, but that the most drastic loss of labor occured for volunteers rather than employees.